Carga del dataset
Code
donations = pd.read_csv("https://raw.githubusercontent.com/erickedu85/dataset/master/donations.csv" ,parse_dates= ['date' ])
donations.head()
0
2020-01-02
62419.79
0.0
3342
0
18.677376
1040.00
879029.00
0.0
Thu
1
2020-01-03
37983.67
0.0
1949
0
19.488799
1000.00
917012.67
0.0
Fri
2
2020-01-04
26219.10
0.0
1337
0
19.610396
208.00
943231.77
0.0
Sat
3
2020-01-05
33856.07
0.0
1289
0
26.265376
5596.86
977087.84
0.0
Sun
4
2020-01-06
26447.11
0.0
1347
0
19.634083
699.67
1003534.95
0.0
Mon
Code
# print(alt.themes.names())
alt.themes.enable('dark' )
C:\Users\pedro\AppData\Local\Temp\ipykernel_8384\1193503845.py:2: AltairDeprecationWarning:
Deprecated since `altair=5.5.0`. Use altair.theme instead.
Most cases require only the following change:
# Deprecated
alt.themes.enable('quartz')
# Updated
alt.theme.enable('quartz')
If your code registers a theme, make the following change:
# Deprecated
def custom_theme():
return {'height': 400, 'width': 700}
alt.themes.register('theme_name', custom_theme)
alt.themes.enable('theme_name')
# Updated
@alt.theme.register('theme_name', enable=True)
def custom_theme():
return alt.theme.ThemeConfig(
{'height': 400, 'width': 700}
)
See the updated User Guide for further details:
https://altair-viz.github.io/user_guide/api.html#theme
https://altair-viz.github.io/user_guide/customization.html#chart-themes
ThemeRegistry.enable('dark')
Gráfico lineal
Code
alt.Chart(donations).mark_line().encode(
alt.X('date' ),
alt.Y('sum' )
).interactive()
Gráfico de barras
Code
list_week_day = ['Mon' ,'Tue' ,'Wed' , 'Thu' ,'Fri' ,'Sat' ,'Sun' ]
top_day = donations.groupby('week_day' )['sum' ].sum ().idxmax()
chart = alt.Chart(donations).mark_bar().encode(
alt.X('sum(sum)' ),
alt.Y('week_day' ,sort= list_week_day),
color = alt.condition(
alt.datum.week_day == top_day,
alt.value('coral' ),
alt.value('steelblue' )
)
)
texto = chart.mark_text(align= 'left' ,dx= 5 ).encode(
text = alt.Text('sum(sum)' , format = '$.3s' )
# text = alt.condition(
# alt.datum.week_day == top_day,
# alt.Text('sum(sum):Q', format='$.3s'),
# alt.Text('sum(sum):Q', format='$.3s')
# )
)
chart + texto
Técnicas de interacción
Bind legend
Code
click_legend = alt.selection_point(fields= ['Origin' ],bind= 'legend' )
points_opacity_condition = alt.condition(
click_legend,
alt.value(1 ),
alt.value(0.3 )
)
points = alt.Chart(cars).mark_circle().encode(
alt.X('Horsepower' ),
alt.Y('Miles_per_Gallon' ),
alt.Color('Origin' ),
opacity = points_opacity_condition
).add_params(click_legend)
points
Selección de 2 gráficos diferentes
Code
# BARRAS
click = alt.selection_point(fields= ['Origin' ])
bar_opacity_condition = alt.condition(
click,
alt.value(1 ),
alt.value(0.3 )
)
bars = alt.Chart(cars).mark_bar().encode(
alt.X('count()' ),
alt.Y('Origin' ),
alt.Color('Origin' ),
opacity = bar_opacity_condition,
tooltip= [
'count()'
]
).add_params(click)
bars
#POINTS
points_opacity_condition = alt.condition(
click,
alt.value(1 ),
alt.value(0.3 )
)
points = alt.Chart(cars).mark_circle().encode(
alt.X('Horsepower' ),
alt.Y('Miles_per_Gallon' ),
alt.Color('Origin' ),
opacity = points_opacity_condition
)
points & bars
Movies
Code
movies = pd.read_csv("https://raw.githubusercontent.com/erickedu85/dataset/master/movies-extended.csv" , parse_dates= ['Release_Date' ])
movies.head()
0
Boynton Beach Club
3127472
2006-03-24
R
Romantic Comedy
NaN
NaN
1
Broken Arrow
148345997
1996-02-09
R
Action
55.0
5.8
2
Brazil
9929135
1985-12-18
R
Black Comedy
98.0
8.0
3
The Cable Guy
102825796
1996-06-14
PG-13
Comedy
52.0
5.8
4
Chain Reaction
60209334
1996-08-02
PG-13
Action
13.0
5.2
Code
# --- dropdown ----
unique_genres = sorted (movies['Major_Genre' ].unique())
# print(unique_genres)
dropdown_genres = alt.binding_select(
name= "Seleccionar genero: " ,
options = unique_genres
)
# -------------------
# -- radiobutton ---
unique_mpaaa = sorted (movies['MPAA_Rating' ].unique())
radiobuttons_mpaa = alt.binding_radio(
name = "Seleccionar MPAA Rating: " ,
options = unique_mpaaa
)
# ------------------
select_genre_mpaa = alt.selection_point(
fields= ['Major_Genre' ,'MPAA_Rating' ],
bind= {
'Major_Genre' :dropdown_genres, 'MPAA_Rating' :radiobuttons_mpaa
}
)
opacity_condition = alt.condition(
select_genre_mpaa,
alt.value(1 ),
alt.value(0.1 )
)
points = alt.Chart(movies).mark_circle().encode(
alt.X('Rotten_Tomatoes_Rating' ),
alt.Y('IMDB_Rating' ),
alt.Color('Major_Genre' ),
opacity= opacity_condition,
tooltip= ['Title' ]
).add_params(select_genre_mpaa)
points
Slider
Code
worldwide_gross_max = movies['Worldwide_Gross' ].max ()
slider_gross = alt.binding_range(
name= 'Worldwide Gross: ' ,
min = 0 ,
max = worldwide_gross_max,
step= 10e6
)
select_worldwige_gross = alt.selection_point(
fields = ['Worldwide_Gross' ],
bind= slider_gross
)
opacity_condition = alt.condition(
# select_tomatoes_rating,
alt.datum.Worldwide_Gross > select_worldwige_gross.Worldwide_Gross,
alt.value(1 ),
alt.value(0.1 )
)
points = alt.Chart(movies).mark_circle().encode(
alt.X('Rotten_Tomatoes_Rating' ),
alt.Y('IMDB_Rating' ),
alt.Color('Major_Genre' ),
opacity= opacity_condition,
tooltip= ['Title' ,'Rotten_Tomatoes_Rating' ,'IMDB_Rating' ,'Major_Genre' ]
).add_params(select_worldwige_gross).interactive()
points
Overview + details
Code
brush = alt.selection_interval()
opacity_condition = alt.condition(
brush,
alt.value(1 ),
alt.value(0.2 )
)
bars_slider = alt.Chart(movies).mark_bar().encode(
alt.X('Release_Date' ),
alt.Y('count()' ),
opacity = opacity_condition
).properties(
height = 50
).add_params(brush)
points = alt.Chart(movies).mark_circle().encode(
alt.X('Rotten_Tomatoes_Rating' ),
alt.Y('IMDB_Rating' ),
alt.Color('Major_Genre' ),
opacity= opacity_condition,
tooltip= ['Title' ,'Rotten_Tomatoes_Rating' ,'IMDB_Rating' ,'Major_Genre' ,'Release_Date' ]
).add_params(brush)
points & bars_slider
C:\Users\pedro\AppData\Local\Temp\ipykernel_8384\4091028901.py:26: UserWarning:
Automatically deduplicated selection parameter with identical configuration. If you want independent parameters, explicitly name them differently (e.g., name='param1', name='param2'). See https://github.com/vega/altair/issues/3891
Minimap - Release Date
Code
select_date = alt.selection_interval()
base = alt.Chart(movies).mark_line(point= True ).encode(
alt.X('Release_Date' ),
alt.Y('mean(Worldwide_Gross)' )
).properties(
width= 500
)
# base
##details
upper_chart = base.encode(
alt.X('Release_Date' , scale= alt.Scale(domain= select_date))
).properties(height= 180 )
lower_chart = base.add_params(select_date).properties(height= 50 )
upper_chart & lower_chart
Dataset movies from vega
Code
import altair as alt
import pandas as pd
from vega_datasets import data
movies_cleaned = data.movies()
movies_cleaned.head()
movies_cleaned.info()
<class 'pandas.DataFrame'>
RangeIndex: 3201 entries, 0 to 3200
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Title 3200 non-null object
1 US_Gross 3194 non-null float64
2 Worldwide_Gross 3194 non-null float64
3 US_DVD_Sales 564 non-null float64
4 Production_Budget 3200 non-null float64
5 Release_Date 3201 non-null str
6 MPAA_Rating 2596 non-null str
7 Running_Time_min 1209 non-null float64
8 Distributor 2969 non-null str
9 Source 2836 non-null str
10 Major_Genre 2926 non-null str
11 Creative_Type 2755 non-null str
12 Director 1870 non-null str
13 Rotten_Tomatoes_Rating 2321 non-null float64
14 IMDB_Rating 2988 non-null float64
15 IMDB_Votes 2988 non-null float64
dtypes: float64(8), object(1), str(7)
memory usage: 400.3+ KB
Code
movies_cleaned['Release_Date' ] = pd.to_datetime(movies_cleaned['Release_Date' ])
markline
Code
alt.Chart(movies_cleaned).mark_line(point= True ).encode(
alt.X('year(Release_Date)' ),
alt.Y('sum(US_Gross)' ),
tooltip= ['year(Release_Date)' , 'sum(US_Gross)' ]
).properties(
width= 600 ,
height= 300
).interactive()
mark trail
Code
alt.Chart(movies_cleaned).mark_trail().encode(
alt.X('year(Release_Date)' ),
alt.Y('sum(US_Gross)' ),
alt.Size('mean(US_Gross)' ),
tooltip= ['year(Release_Date)' , 'mean(US_Gross)' ]
).properties(
width= 600 ,
height= 300
).interactive()
suma acumulada
Code
alt.Chart(movies_cleaned).mark_line(point= True ).transform_window(
# Sort the data chronologically
sort= [{"field" : "Release_Date" }],
# What to add up as you go
cumulative_us_gross= 'sum(US_Gross)'
).encode(
alt.X('Release_Date' ),
alt.Y('cumulative_us_gross:Q' ),
tooltip= ['year(Release_Date):T' ]
).properties(
width= 600 ,
height= 300
).interactive()
area
Code
alt.Chart(movies_cleaned).mark_area(
color= 'blue' ,
interpolate= 'basis' ,
line= True
).encode(
alt.X('year(Release_Date)' ),
alt.Y('mean(US_Gross)' ),
tooltip= ['year(Release_Date)' , 'mean(US_Gross):Q' ]
).properties(
width= 600 ,
height= 300
).interactive()
area gradiente de color
Code
alt.Chart(movies_cleaned).mark_area(
interpolate= 'basis' ,
line= True ,
color= alt.Gradient(
gradient= 'linear' ,
stops= [
alt.GradientStop(color= 'white' , offset= 0 ),
alt.GradientStop(color= 'darkgreen' , offset= 1 )
],
x1= 1 ,
x2= 1 ,
y1= 1 ,
y2= 0
)
).encode(
alt.X('year(Release_Date)' ),
alt.Y('mean(US_Gross)' ),
tooltip= ['year(Release_Date)' , 'mean(US_Gross):Q' ]
).properties(
width= 600 ,
height= 300
).interactive()
highlight
Code
highlight = alt.selection_point(
on= "pointerover" ,
fields= ["Major_Genre" ],
clear= "mouseout"
)
alt.Chart(movies_cleaned).mark_line(
point= alt.OverlayMarkDef(filled= False , fill= "white" )
).encode(
alt.X('year(Release_Date)' ),
alt.Y('mean(US_Gross)' ),
alt.Color('Major_Genre' ),
opacity= alt.condition(highlight, alt.value(1 ), alt.value(0.1 )),
tooltip= ['year(Release_Date)' , 'mean(US_Gross)' ]
).add_params(
highlight
).properties(
width= 600 ,
height= 300
).interactive()
highlight area
Code
format_gross = '$.3s'
highlight = alt.selection_point(
on= "pointerover" , fields= ["Major_Genre" ], clear= "mouseout"
)
alt.Chart(movies_cleaned).mark_area(
interpolate= 'basis' ,
line= True
).encode(
x= alt.X('year(Release_Date)' ),
y= alt.Y('sum(US_Gross)' , axis= alt.Axis(format = format_gross)),
color= 'Major_Genre:N' ,
opacity= alt.condition(highlight, alt.value(1 ), alt.value(0.3 )),
tooltip= [
alt.Tooltip('year(Release_Date)' ),
alt.Tooltip('Major_Genre' ),
alt.Tooltip('sum(US_Gross)' , format = format_gross)
]
).add_params(
highlight
).properties(
width= 600 ,
height= 300
).interactive()
small multiple
Code
format_gross = '$.3s'
highlight = alt.selection_point(
on= "pointerover" , fields= ["Major_Genre" ], clear= "mouseout"
)
alt.Chart(movies_cleaned).mark_area(
interpolate= 'basis' ,
line= True
).encode(
x= alt.X('year(Release_Date)' ),
y= alt.Y('sum(US_Gross)' , title= 'Sum US Gross' , axis= alt.Axis(format = format_gross)),
row= alt.Row('Major_Genre:N' ),
color= 'Major_Genre:N' ,
opacity= alt.condition(highlight, alt.value(1 ), alt.value(0.3 )),
tooltip= [
alt.Tooltip('year(Release_Date)' ),
alt.Tooltip('Major_Genre' ),
alt.Tooltip('sum(US_Gross)' , format = format_gross)
]
).add_params(
highlight
).properties(
width= 600 ,
height= 70
).interactive()
streamgraph
Code
format_gross = '$.3s'
highlight = alt.selection_point(
on= "pointerover" , fields= ["Major_Genre" ], clear= "mouseout"
)
alt.Chart(movies_cleaned).mark_area(
interpolate= 'basis' ,
line= True
).encode(
x= alt.X('year(Release_Date)' ),
y= alt.Y('sum(US_Gross)' , stack= 'center' , axis= None ),
color= 'Major_Genre:N' ,
opacity= alt.condition(highlight, alt.value(1 ), alt.value(0.3 )),
tooltip= [
alt.Tooltip('year(Release_Date)' ),
alt.Tooltip('Major_Genre' ),
alt.Tooltip('sum(US_Gross)' , format = format_gross)
]
).add_params(
highlight
).properties(
width= 600 ,
height= 300
).interactive()
heatmap
Code
alt.Chart(movies_cleaned).mark_rect().encode(
alt.X('date(Release_Date):O' , title= 'Day' ),
alt.Y('month(Release_Date):O' , title= 'Month' ),
color= 'sum(US_Gross)' ,
tooltip= ['sum(US_Gross)' ]
)